Resources for Learning R

DataCamp Intro To R

  • Truly introductory
  • Code In Browser, has hints

Install R

RStudio Cloud

FreeCodeCamp Intro Video

R Basics

This is an R Markdown file. It lets you run by code block or output a report with knittr

Math

print("1 + 1")
## [1] "1 + 1"
1 + 1
## [1] 2
print("13* 13")
## [1] "13* 13"
13* 13
## [1] 169
print("25 / 5")
## [1] "25 / 5"
25 / 5
## [1] 5
print("21 / 5")
## [1] "21 / 5"
21 / 5
## [1] 4.2
print("21 %% 5")
## [1] "21 %% 5"
21 %% 5
## [1] 1
print("5^3")
## [1] "5^3"
5^3
## [1] 125

Variables

5 -> five
thirteen = 13
print("1 + 1")
## [1] "1 + 1"
1 + 1
## [1] 2
print("13* 13")
## [1] "13* 13"
thirteen* thirteen
## [1] 169
print("25 / 5")
## [1] "25 / 5"
(20 + five) / five
## [1] 5
print("21 / 5")
## [1] "21 / 5"
21 / five
## [1] 4.2
print("21 %% 5")
## [1] "21 %% 5"
21 %% five
## [1] 1
print("5^3")
## [1] "5^3"
five^3
## [1] 125

Vectors

names <- c('test','Myocarditis','beta coronavirus','novel corona 2019','Encephalitis','Hepatitis A', 'influenza', 'coronaitis')
names
## [1] "test"              "Myocarditis"       "beta coronavirus" 
## [4] "novel corona 2019" "Encephalitis"      "Hepatitis A"      
## [7] "influenza"         "coronaitis"
which_name <- function(name) {
  type <- 'Neither'
  if(grepl('corona', name,ignore.case = TRUE)){
    type <-'corona'
    }
  if(grepl('itis', name,ignore.case = TRUE)){
    type <-'itis'
    }
  type 
}

#testing top dataset to confirm it is working
sapply(names, which_name,simplify = TRUE)
##              test       Myocarditis  beta coronavirus novel corona 2019 
##         "Neither"            "itis"          "corona"          "corona" 
##      Encephalitis       Hepatitis A         influenza        coronaitis 
##            "itis"            "itis"         "Neither"            "itis"

COVID DATASET

  • From the John Hopkins dataset avaiable on GitHub
  • COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/

Read in Files

Change with new COVID file for most up to date info

covid_raw = read.csv("06-28-2020-us.csv",header = TRUE)
#covid_ts_confirmed = read.csv(file.choose(),header = TRUE)

Look at Raw Data

head(covid_raw)
##   Province_State Country_Region         Last_Update      Lat     Long_
## 1        Alabama             US 2020-06-29 04:33:57  32.3182  -86.9023
## 2         Alaska             US 2020-06-29 04:33:57  61.3707 -152.4044
## 3 American Samoa             US 2020-06-29 04:33:57 -14.2710 -170.1320
## 4        Arizona             US 2020-06-29 04:33:57  33.7298 -111.4312
## 5       Arkansas             US 2020-06-29 04:33:57  34.9697  -92.3731
## 6     California             US 2020-06-29 04:33:57  36.1162 -119.6816
##   Confirmed Deaths Recovered Active FIPS Incident_Rate People_Tested
## 1     35441    919     18866  15656    1      722.8159        386280
## 2       880     14       521    345    2      120.2934        108300
## 3         0      0        NA      0   60        0.0000           696
## 4     73920   1594      8926  63400    4     1015.5636        509896
## 5     19818    264     13270   6284    5      656.7027        291222
## 6    215296   5932        NA 209364    6      544.8846       3955952
##   People_Hospitalized Mortality_Rate      UID ISO3 Testing_Rate
## 1                2703       2.593042 84000001  USA     7878.145
## 2                  NA       1.590909 84000002  USA    14804.284
## 3                  NA             NA       16  ASM     1250.876
## 4                4617       2.156385 84000004  USA     7005.301
## 5                1373       1.332122 84000005  USA     9650.130
## 6                  NA       2.755276 84000006  USA    10011.970
##   Hospitalization_Rate
## 1             7.626760
## 2                   NA
## 3                   NA
## 4             6.245942
## 5             6.928045
## 6                   NA
#covid_ts_confirmed

Plot Data

Initial lookinto ploting data points Note here

#install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot(covid_raw)

byState10 <- covid_raw %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases in First Ten Provinces")

ggplotly(byState10)
byState <- covid_raw %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases per Province")
ggplotly(byState)
byStateNoNY <- filter(covid_raw, Province_State != 'New York' & Province_State != 'New Jersey')  %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases per Province")
ggplotly(byStateNoNY)
#covid_raw %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State) + geom_point(aes(fill=Province_State)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus China)")
statesAbove700Deaths <- filter(covid_raw, Deaths >= 700)  %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Over 700 Mortality States")
ggplotly(statesAbove700Deaths)
statesAbove700Deaths <- filter(covid_raw, Deaths < 700)  %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Province_State)) + geom_point(aes(fill=Province_State)) + ggtitle("Under 700 Mortality States")
ggplotly(statesAbove700Deaths)

Map data using Fips codes

library(e1071)
library(usmap)

UScovid_dataset <- filter(covid_raw, Country_Region == 'US' & FIPS != 'NA') 
UScovid_dataset
##              Province_State Country_Region         Last_Update      Lat
## 1                   Alabama             US 2020-06-29 04:33:57  32.3182
## 2                    Alaska             US 2020-06-29 04:33:57  61.3707
## 3            American Samoa             US 2020-06-29 04:33:57 -14.2710
## 4                   Arizona             US 2020-06-29 04:33:57  33.7298
## 5                  Arkansas             US 2020-06-29 04:33:57  34.9697
## 6                California             US 2020-06-29 04:33:57  36.1162
## 7                  Colorado             US 2020-06-29 04:33:57  39.0598
## 8               Connecticut             US 2020-06-29 04:33:57  41.5978
## 9                  Delaware             US 2020-06-29 04:33:57  39.3185
## 10         Diamond Princess             US 2020-06-29 04:33:57       NA
## 11     District of Columbia             US 2020-06-29 04:33:57  38.8974
## 12                  Florida             US 2020-06-29 04:33:57  27.7663
## 13                  Georgia             US 2020-06-29 04:33:57  33.0406
## 14           Grand Princess             US 2020-06-29 04:33:57       NA
## 15                     Guam             US 2020-06-29 04:33:57  13.4443
## 16                   Hawaii             US 2020-06-29 04:33:57  21.0943
## 17                    Idaho             US 2020-06-29 04:33:57  44.2405
## 18                 Illinois             US 2020-06-29 04:33:57  40.3495
## 19                  Indiana             US 2020-06-29 04:33:57  39.8494
## 20                     Iowa             US 2020-06-29 04:33:57  42.0115
## 21                   Kansas             US 2020-06-29 04:33:57  38.5266
## 22                 Kentucky             US 2020-06-29 04:33:57  37.6681
## 23                Louisiana             US 2020-06-29 04:33:57  31.1695
## 24                    Maine             US 2020-06-29 04:33:57  44.6939
## 25                 Maryland             US 2020-06-29 04:33:57  39.0639
## 26            Massachusetts             US 2020-06-29 04:33:57  42.2302
## 27                 Michigan             US 2020-06-29 04:33:57  43.3266
## 28                Minnesota             US 2020-06-29 04:33:57  45.6945
## 29              Mississippi             US 2020-06-29 04:33:57  32.7416
## 30                 Missouri             US 2020-06-29 04:33:57  38.4561
## 31                  Montana             US 2020-06-29 04:33:57  46.9219
## 32                 Nebraska             US 2020-06-29 04:33:57  41.1254
## 33                   Nevada             US 2020-06-29 04:33:57  38.3135
## 34            New Hampshire             US 2020-06-29 04:33:57  43.4525
## 35               New Jersey             US 2020-06-29 04:33:57  40.2989
## 36               New Mexico             US 2020-06-29 04:33:57  34.8405
## 37                 New York             US 2020-06-29 04:33:57  42.1657
## 38           North Carolina             US 2020-06-29 04:33:57  35.6301
## 39             North Dakota             US 2020-06-29 04:33:57  47.5289
## 40 Northern Mariana Islands             US 2020-06-29 04:33:57  15.0979
## 41                     Ohio             US 2020-06-29 04:33:57  40.3888
## 42                 Oklahoma             US 2020-06-29 04:33:57  35.5653
## 43                   Oregon             US 2020-06-29 04:33:57  44.5720
## 44             Pennsylvania             US 2020-06-29 04:33:57  40.5908
## 45              Puerto Rico             US 2020-06-29 04:33:57  18.2208
## 46             Rhode Island             US 2020-06-29 04:33:57  41.6809
## 47           South Carolina             US 2020-06-29 04:33:57  33.8569
## 48             South Dakota             US 2020-06-29 04:33:57  44.2998
## 49                Tennessee             US 2020-06-29 04:33:57  35.7478
## 50                    Texas             US 2020-06-29 04:33:57  31.0545
## 51                     Utah             US 2020-06-29 04:33:57  40.1500
## 52                  Vermont             US 2020-06-29 04:33:57  44.0459
## 53           Virgin Islands             US 2020-06-29 04:33:57  18.3358
## 54                 Virginia             US 2020-06-29 04:33:57  37.7693
## 55               Washington             US 2020-06-29 04:33:57  47.4009
## 56            West Virginia             US 2020-06-29 04:33:57  38.4912
## 57                Wisconsin             US 2020-06-29 04:33:57  44.2685
## 58                  Wyoming             US 2020-06-29 04:33:57  42.7560
##        Long_ Confirmed Deaths Recovered Active  FIPS Incident_Rate
## 1   -86.9023     35441    919     18866  15656     1     722.81588
## 2  -152.4044       880     14       521    345     2     120.29335
## 3  -170.1320         0      0        NA      0    60       0.00000
## 4  -111.4312     73920   1594      8926  63400     4    1015.56359
## 5   -92.3731     19818    264     13270   6284     5     656.70269
## 6  -119.6816    215296   5932        NA 209364     6     544.88455
## 7  -105.3111     32290   1676      4442  26172     8     560.71332
## 8   -72.7554     46303   4316      8053  33934     9    1298.71733
## 9   -75.5071     11226    507      6665   4054    10    1152.84607
## 10        NA        49      0        NA     49 88888            NA
## 11  -77.0268     10248    550      1199   8499    11    1452.07432
## 12  -81.6868    141075   3419        NA 137656    12     656.84294
## 13  -83.6431     77210   2778        NA  74432    13     727.20094
## 14        NA       103      3        NA    100 99999            NA
## 15  144.7937       247      5       179     63    66     150.39975
## 16 -157.4983       899     18       714    167    15      63.49444
## 17 -114.4788      5322     91      3898   1333    16     297.80674
## 18  -88.9861    141723   6888        NA 134835    17    1118.41068
## 19  -86.2583     44930   2619     33935   8376    18     667.38768
## 20  -93.2105     28520    706     17620  10194    19     903.94191
## 21  -96.7265     13847    269       779  12799    20     475.30064
## 22  -84.6701     15232    558      3730  10944    21     340.93811
## 23  -91.8678     56236   3199     39792  13245    22    1209.69008
## 24  -69.3819      3191    104      2577    510    23     237.38815
## 25  -76.8021     66777   3168      4976  58633    24    1104.54076
## 26  -71.5301    108667   8059        NA 100608    25    1576.59706
## 27  -84.5361     69946   6157     51099  12690    26     700.38051
## 28  -93.9002     35549   1460     30809   3280    27     630.34255
## 29  -89.6787     25892   1039     17242   7611    28     869.98332
## 30  -92.2884     20689   1004        NA  19685    29     337.09560
## 31 -110.4544       863     22       604    237    30      80.74642
## 32  -98.2681     18899    267     13053   5579    31     976.99141
## 33 -117.0554     17160    500       684  15976    32     557.11464
## 34  -71.5639      5747    367      4401    979    33     422.66335
## 35  -74.5210    171182  14975     30092 126115    34    1927.24992
## 36 -106.2485     11809    492      5251   6066    35     563.18374
## 37  -74.9481    392539  31397     70010 291132    36    2017.82594
## 38  -79.8064     62248   1352     36921  23975    37     593.51165
## 39  -99.7840      3495     79      3139    277    38     458.62410
## 40  145.6739        30      2        19      9    69      54.40302
## 41  -82.7649     50309   2807        NA  47502    39     430.39242
## 42  -96.9289     12947    385      9397   3165    40     327.19472
## 43 -122.0709      8341    202      2649   5490    41     197.76008
## 44  -77.2098     89863   6606     66686  16571    42     701.94561
## 45  -66.5901      7189    153        NA   7036    72     245.07331
## 46  -71.5118     16661    927      1600  14134    44    1572.74055
## 47  -80.9450     33320    716     13456  19148    45     647.15189
## 48  -99.4388      6681     91      5752    838    46     755.20624
## 49  -86.6923     40172    584     26159  13429    47     588.24098
## 50  -97.5635    150152   2402     79974  67776    48     517.83907
## 51 -111.8624     21100    167     11931   9002    49     658.14961
## 52  -72.7107      1202     56       946    200    50     192.63160
## 53  -64.8963        81      6        71      4    78      75.51180
## 54  -78.1700     61736   1732      8005  51999    51     723.28349
## 55 -121.4905     31752   1310        NA  30442    53     416.97237
## 56  -80.9545      2832     93      2062    677    54     158.02275
## 57  -89.6165     27743    777     21953   5013    55     476.48458
## 58 -107.3025      1417     20      1057    340    56     244.83421
##    People_Tested People_Hospitalized Mortality_Rate      UID ISO3 Testing_Rate
## 1         386280                2703      2.5930420 84000001  USA    7878.1445
## 2         108300                  NA      1.5909091 84000002  USA   14804.2841
## 3            696                  NA             NA       16  ASM    1250.8762
## 4         509896                4617      2.1563853 84000004  USA    7005.3005
## 5         291222                1373      1.3321223 84000005  USA    9650.1297
## 6        3955952                  NA      2.7552765 84000006  USA   10011.9702
## 7         313711                5399      5.1904614 84000008  USA    5447.5670
## 8         438623               10268      9.3212103 84000009  USA   12302.6000
## 9         106346                  NA      4.5163014 84000010  USA   10921.1267
## 10            NA                  NA      0.0000000 84088888  USA           NA
## 11         93132                  NA      5.3669009 84000011  USA   13196.1930
## 12       1881897               14540      2.4235336 84000012  USA    8762.0823
## 13        806938               10711      3.5979795 84000013  USA    7600.1305
## 14            NA                  NA      2.9126214 84099999  USA           NA
## 15         12378                  NA      2.0242915      316  GUM    7537.0367
## 16         75478                 110      2.0022247 84000015  USA    5330.8491
## 17         86345                 312      1.7098835 84000016  USA    4831.6653
## 18       1546031                  NA      4.8601850 84000017  USA   12200.5432
## 19        470535                7003      5.8290674 84000018  USA    6989.3003
## 20        295915                  NA      2.4754558 84000019  USA    9379.0312
## 21        167859                1128      1.9426591 84000020  USA    5761.7888
## 22        358491                2590      3.6633403 84000021  USA    8024.1101
## 23        696111                  NA      5.6885269 84000022  USA   14974.0126
## 24         93495                 346      3.2591664 84000023  USA    6955.3761
## 25        519473               10793      4.7441484 84000024  USA    8592.4660
## 26        835794               11319      7.4162349 84000025  USA   12126.1318
## 27       1016679                  NA      8.8025048 84000026  USA   10180.1698
## 28        585417                4010      4.1070072 84000027  USA   10380.4113
## 29        280188                3102      4.0128225 84000028  USA    9414.4480
## 30        361173                  NA      4.8528203 84000029  USA    5884.7615
## 31         82474                  97      2.5492468 84000030  USA    7716.6633
## 32        172798                1315      1.4127732 84000031  USA    8932.8621
## 33        267580                  NA      2.9137529 84000032  USA    8687.2223
## 34        116109                 562      6.3859405 84000033  USA    8539.2411
## 35       1387833               19841      8.7479992 84000034  USA   15624.8966
## 36        322959                1851      4.1663138 84000035  USA   15402.2574
## 37       3816485               89995      7.9984409 84000036  USA   19618.4390
## 38        871905                  NA      2.1719573 84000037  USA    8313.2915
## 39        103925                 226      2.2603720 84000038  USA   13637.3418
## 40          8217                  NA      6.6666667      580  MNP   14900.9865
## 41        756765                7681      5.5795186 84000039  USA    6474.1084
## 42        326015                1456      2.9736619 84000040  USA    8239.0040
## 43        217391                1022      2.4217720 84000041  USA    5154.2095
## 44        742982                  NA      7.3511901 84000042  USA    5803.6450
## 45          7189                  NA      2.1282515      630  PRI     245.0733
## 46        230508                1984      5.5638917 84000044  USA   21759.1548
## 47        359802                2622      2.1488595 84000045  USA    6988.1916
## 48         78893                 652      1.3620715 84000046  USA    8917.8994
## 49        748553                2564      1.4537489 84000047  USA   10961.1060
## 50       1775219                  NA      1.5997123 84000048  USA    6122.3144
## 51        328449                1396      0.7914692 84000049  USA   10244.9564
## 52         63865                  NA      4.6589018 84000050  USA   10234.9561
## 53          2827                  NA      7.4074074      850  VIR    2635.4551
## 54        628328                8823      2.8054944 84000051  USA    7361.3333
## 55        525802                4240      4.1257244 84000053  USA    6904.9165
## 56        166508                  NA      3.2838983 84000054  USA    9290.9789
## 57        552454                3393      2.8007065 84000055  USA    9488.3686
## 58         31823                 112      1.4114326 84000056  USA    5498.4890
##    Hospitalization_Rate
## 1              7.626760
## 2                    NA
## 3                    NA
## 4              6.245942
## 5              6.928045
## 6                    NA
## 7             16.720347
## 8             22.175669
## 9                    NA
## 10                   NA
## 11                   NA
## 12            10.306575
## 13            13.872555
## 14                   NA
## 15                   NA
## 16            12.235818
## 17             5.862458
## 18                   NA
## 19            15.586468
## 20                   NA
## 21             8.146169
## 22            17.003676
## 23                   NA
## 24            10.842996
## 25            16.162751
## 26            10.416226
## 27                   NA
## 28            11.280205
## 29            11.980535
## 30                   NA
## 31            11.239861
## 32             6.958040
## 33                   NA
## 34             9.779015
## 35            11.590588
## 36            15.674486
## 37            22.926384
## 38                   NA
## 39             6.466381
## 40                   NA
## 41            15.267646
## 42            11.245848
## 43            12.252727
## 44                   NA
## 45                   NA
## 46            11.908049
## 47             7.869148
## 48             9.759018
## 49             6.382555
## 50                   NA
## 51             6.616114
## 52                   NA
## 53                   NA
## 54            14.291499
## 55            13.353490
## 56                   NA
## 57            12.230112
## 58             7.904023
#UScovid_dataset$fips <- fips(brew_count_by_state$state)
attach(UScovid_dataset)
UScovid_dataset_fips <- UScovid_dataset[order(FIPS),] 
detach(UScovid_dataset)


UScovid_dataset_fips$fips = UScovid_dataset_fips$FIPS

plot_usmap(data = UScovid_dataset_fips,  
           values = "Deaths", 
           color = rgb(.2, .7, 1)) + 
    labs(title = "Covid Deaths by State", 
         subtitle = "Count of Covid19 Deaths per state") + 
  scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), 
                        name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

plot_usmap(data = filter(UScovid_dataset_fips, Province_State != 'New York'),  values = "Deaths", color = rgb(.2, .7, 1)) + 
    labs(title = "Covid Deaths by State (New York Removed)", subtitle = "Count of Covid19 Deaths per state") + 
  scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

plot_usmap(data = filter(UScovid_dataset_fips, Province_State != 'New York' & Province_State != 'New Jersey'),  values = "Deaths", color = rgb(.2, .7, 1)) + 
    labs(title = "Covid Deaths by State (New York Removed)", subtitle = "Count of Covid19 Deaths per state") + 
  scale_fill_continuous(low = "white", high = rgb(.2, .7, 1), name = "Deaths per state", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

Previous Work - Finding the top Countries by Confirmed Cases

Read in Files

Change with new COVID file for most up to date info

covid_raw_world = read.csv("06-28-2020.csv",header = TRUE)
#covid_ts_confirmed = read.csv(file.choose(),header = TRUE)
head(covid_raw_world)
##    FIPS    Admin2 Province_State Country_Region         Last_Update      Lat
## 1 45001 Abbeville South Carolina             US 2020-06-29 04:33:44 34.22333
## 2 22001    Acadia      Louisiana             US 2020-06-29 04:33:44 30.29506
## 3 51001  Accomack       Virginia             US 2020-06-29 04:33:44 37.76707
## 4 16001       Ada          Idaho             US 2020-06-29 04:33:44 43.45266
## 5 19001     Adair           Iowa             US 2020-06-29 04:33:44 41.33076
## 6 21001     Adair       Kentucky             US 2020-06-29 04:33:44 37.10460
##        Long_ Confirmed Deaths Recovered Active                  Combined_Key
## 1  -82.46171       103      0         0    103 Abbeville, South Carolina, US
## 2  -92.41420       812     36         0    776         Acadia, Louisiana, US
## 3  -75.63235      1039     14         0   1025        Accomack, Virginia, US
## 4 -116.24155      1841     23         0   1818                Ada, Idaho, US
## 5  -94.47106        15      0         0     15               Adair, Iowa, US
## 6  -85.28130       111     19         0     92           Adair, Kentucky, US
##   Incidence_Rate Case.Fatality_Ratio
## 1       419.9454            0.000000
## 2      1308.7275            4.433498
## 3      3215.1256            1.347449
## 4       382.2778            1.249321
## 5       209.7315            0.000000
## 6       578.0648           17.117117
confirmed_by_country <- covid_raw_world%>% group_by(Country_Region) %>% tally(Confirmed, name = "Confirmed", sort = TRUE)
head(confirmed_by_country)
## # A tibble: 6 x 2
##   Country_Region Confirmed
##   <chr>              <int>
## 1 US               2548996
## 2 Brazil           1344143
## 3 Russia            633563
## 4 India             548318
## 5 United Kingdom    312640
## 6 Peru              279419
deaths_by_country <- covid_raw_world%>% group_by(Country_Region) %>% tally(Deaths, name = "Deaths", sort = TRUE)
head(deaths_by_country)
## # A tibble: 6 x 2
##   Country_Region Deaths
##   <chr>           <int>
## 1 US             125803
## 2 Brazil          57622
## 3 United Kingdom  43634
## 4 Italy           34738
## 5 France          29781
## 6 Spain           28343
totals <- merge(confirmed_by_country, deaths_by_country, by="Country_Region")
head(totals)
##        Country_Region Confirmed Deaths
## 1         Afghanistan     30967    721
## 2             Albania      2402     55
## 3             Algeria     13273    897
## 4             Andorra       855     52
## 5              Angola       267     11
## 6 Antigua and Barbuda        69      3

Then reordered by Confirmed

#order(totals$Confirmed, decreasing = TRUE)
#totals$Confirmed
#totals[180,]

top_to_least <- totals[order(totals$Confirmed, decreasing = TRUE),]

head(top_to_least)
##     Country_Region Confirmed Deaths
## 180             US   2548996 125803
## 24          Brazil   1344143  57622
## 141         Russia    633563   9060
## 80           India    548318  16475
## 178 United Kingdom    312640  43634
## 135           Peru    279419   9317
top10Confirmed <- top_to_least %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top countries")

ggplotly(top10Confirmed)
# At the time, China was the highest and I wanted to look at the rest, now it is much different
top10ConfirmedMinusUS <- subset(top_to_least, Country_Region != "US")  %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus US)")
ggplotly(top10ConfirmedMinusUS)
# Now removing US instead
top10ConfirmedMinusUSandB <- subset(top_to_least, Country_Region != "US" & Country_Region != "Brazil")  %>% head(10) %>% ggplot(aes(x=Deaths, y=Confirmed, fill=Country_Region)) + geom_point(aes(fill=Country_Region)) + ggtitle("Deaths vs Confirmed Cases in Top Countries (Minus US and Brazil)")
ggplotly(top10ConfirmedMinusUSandB)